

<!DOCTYPE html>
<html lang="zh-CN" data-default-color-scheme=auto>



<head>
  <meta charset="UTF-8">
  <link rel="apple-touch-icon" sizes="76x76" href="/img/fluid.png">
  <link rel="icon" href="/img/favicon.ico">
  <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=5.0, shrink-to-fit=no">
  <meta http-equiv="x-ua-compatible" content="ie=edge">
  
  <meta name="theme-color" content="#2f4154">
  <meta name="author" content="xuehaoweng">
  <meta name="keywords" content="">
  
    <meta name="description" content="引言在当今高速发展的云计算时代，对于系统资源的监控变得尤为重要。像 CPU 和内存这样的计算资源，是支撑应用程序运行的基础。一旦这些资源出现瓶颈，将直接影响到应用的性能和稳定性。因此，构建一个有效的监控告警系统，能够实时监测资源使用情况，并在异常发生时及时发出警报，成为了云原生环境中不可或缺的一环。本文将引导您了解如何使用 Prometheus 和 Alertmanager 来构建这样一个监控系统">
<meta property="og:type" content="article">
<meta property="og:title" content="云原生监控入门：使用Prometheus、Alertmanager 实现Cpu和内存的监控告警">
<meta property="og:url" content="http://blog.xhweng.cn/2024/03/27/%E4%BA%91%E5%8E%9F%E7%94%9F%E7%9B%91%E6%8E%A7%E5%85%A5%E9%97%A8%EF%BC%9A%E4%BD%BF%E7%94%A8Prometheus%E3%80%81Alertmanager%20%E5%AE%9E%E7%8E%B0Cpu%E5%92%8C%E5%86%85%E5%AD%98%E7%9A%84%E7%9B%91%E6%8E%A7%E5%91%8A%E8%AD%A6/index.html">
<meta property="og:site_name" content="有追求的开发者">
<meta property="og:description" content="引言在当今高速发展的云计算时代，对于系统资源的监控变得尤为重要。像 CPU 和内存这样的计算资源，是支撑应用程序运行的基础。一旦这些资源出现瓶颈，将直接影响到应用的性能和稳定性。因此，构建一个有效的监控告警系统，能够实时监测资源使用情况，并在异常发生时及时发出警报，成为了云原生环境中不可或缺的一环。本文将引导您了解如何使用 Prometheus 和 Alertmanager 来构建这样一个监控系统">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="http://blog.xhweng.cn/img/file_img/prometheus.jpg">
<meta property="article:published_time" content="2024-03-27T02:15:50.000Z">
<meta property="article:modified_time" content="2024-03-27T02:59:27.759Z">
<meta property="article:author" content="xuehaoweng">
<meta property="article:tag" content="监控">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:image" content="http://blog.xhweng.cn/img/file_img/prometheus.jpg">
  
  
    <meta name="referrer" content="no-referrer-when-downgrade">
  
  
  <title>云原生监控入门：使用Prometheus、Alertmanager 实现Cpu和内存的监控告警 - 有追求的开发者</title>

  <link  rel="stylesheet" href="https://lib.baomitu.com/twitter-bootstrap/4.6.1/css/bootstrap.min.css" />



  <link  rel="stylesheet" href="https://lib.baomitu.com/github-markdown-css/4.0.0/github-markdown.min.css" />

  <link  rel="stylesheet" href="https://lib.baomitu.com/hint.css/2.7.0/hint.min.css" />

  <link  rel="stylesheet" href="https://lib.baomitu.com/fancybox/3.5.7/jquery.fancybox.min.css" />



<!-- 主题依赖的图标库，不要自行修改 -->
<!-- Do not modify the link that theme dependent icons -->

<link rel="stylesheet" href="//at.alicdn.com/t/font_1749284_hj8rtnfg7um.css">



<link rel="stylesheet" href="//at.alicdn.com/t/font_1736178_lbnruvf0jn.css">


<link  rel="stylesheet" href="/css/main.css" />


  <link id="highlight-css" rel="stylesheet" href="/css/highlight.css" />
  
    <link id="highlight-css-dark" rel="stylesheet" href="/css/highlight-dark.css" />
  




  <script id="fluid-configs">
    var Fluid = window.Fluid || {};
    Fluid.ctx = Object.assign({}, Fluid.ctx)
    var CONFIG = {"hostname":"blog.xhweng.cn","root":"/","version":"1.9.7","typing":{"enable":true,"typeSpeed":70,"cursorChar":"_","loop":false,"scope":[]},"anchorjs":{"enable":true,"element":"h1,h2,h3,h4,h5,h6","placement":"left","visible":"hover","icon":""},"progressbar":{"enable":true,"height_px":3,"color":"#29d","options":{"showSpinner":false,"trickleSpeed":100}},"code_language":{"enable":true,"default":"TEXT"},"copy_btn":true,"image_caption":{"enable":true},"image_zoom":{"enable":true,"img_url_replace":["",""]},"toc":{"enable":true,"placement":"right","headingSelector":"h1,h2,h3,h4,h5,h6","collapseDepth":0},"lazyload":{"enable":true,"loading_img":"/img/loading.gif","onlypost":false,"offset_factor":2},"web_analytics":{"enable":true,"follow_dnt":true,"baidu":"b3bbb822b3d2a5e2761107c03cf72b8c","google":{"measurement_id":null},"tencent":{"sid":null,"cid":null},"woyaola":null,"cnzz":null,"leancloud":{"app_id":null,"app_key":null,"server_url":null,"path":"window.location.pathname","ignore_local":false}},"search_path":"/local-search.xml","include_content_in_search":true};

    if (CONFIG.web_analytics.follow_dnt) {
      var dntVal = navigator.doNotTrack || window.doNotTrack || navigator.msDoNotTrack;
      Fluid.ctx.dnt = dntVal && (dntVal.startsWith('1') || dntVal.startsWith('yes') || dntVal.startsWith('on'));
    }
  </script>
  <script  src="/js/utils.js" ></script>
  <script  src="/js/color-schema.js" ></script>
  

  
    <!-- Baidu Analytics -->
    <script async>
      if (!Fluid.ctx.dnt) {
        var _hmt = _hmt || [];
        (function() {
          var hm = document.createElement("script");
          hm.src = "https://hm.baidu.com/hm.js?b3bbb822b3d2a5e2761107c03cf72b8c";
          var s = document.getElementsByTagName("script")[0];
          s.parentNode.insertBefore(hm, s);
        })();
      }
    </script>
  

  
    <!-- Google tag (gtag.js) -->
    <script async>
      if (!Fluid.ctx.dnt) {
        Fluid.utils.createScript("https://www.googletagmanager.com/gtag/js?id=", function() {
          window.dataLayer = window.dataLayer || [];
          function gtag() {
            dataLayer.push(arguments);
          }
          gtag('js', new Date());
          gtag('config', '');
        });
      }
    </script>
  

  

  

  

  



  
<meta name="generator" content="Hexo 7.0.0"></head>


<body>
  

  <header>
    

<div class="header-inner" style="height: 70vh;">
  <nav id="navbar" class="navbar fixed-top  navbar-expand-lg navbar-dark scrolling-navbar">
  <div class="container">
    <a class="navbar-brand" href="/">
      <strong>有追求的开发者</strong>
    </a>

    <button id="navbar-toggler-btn" class="navbar-toggler" type="button" data-toggle="collapse"
            data-target="#navbarSupportedContent"
            aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
      <div class="animated-icon"><span></span><span></span><span></span></div>
    </button>

    <!-- Collapsible content -->
    <div class="collapse navbar-collapse" id="navbarSupportedContent">
      <ul class="navbar-nav ml-auto text-center">
        
          
          
          
          
            <li class="nav-item">
              <a class="nav-link" href="/" target="_self">
                <i class="iconfont icon-home-fill"></i>
                <span>首页</span>
              </a>
            </li>
          
        
          
          
          
          
            <li class="nav-item">
              <a class="nav-link" href="/archives/" target="_self">
                <i class="iconfont icon-archive-fill"></i>
                <span>归档</span>
              </a>
            </li>
          
        
          
          
          
          
            <li class="nav-item">
              <a class="nav-link" href="/categories/" target="_self">
                <i class="iconfont icon-category-fill"></i>
                <span>分类</span>
              </a>
            </li>
          
        
          
          
          
          
            <li class="nav-item">
              <a class="nav-link" href="/tags/" target="_self">
                <i class="iconfont icon-tags-fill"></i>
                <span>标签</span>
              </a>
            </li>
          
        
          
          
          
          
            <li class="nav-item">
              <a class="nav-link" href="/about/" target="_self">
                <i class="iconfont icon-user-fill"></i>
                <span>关于</span>
              </a>
            </li>
          
        
        
          <li class="nav-item" id="search-btn">
            <a class="nav-link" target="_self" href="javascript:;" data-toggle="modal" data-target="#modalSearch" aria-label="Search">
              <i class="iconfont icon-search"></i>
            </a>
          </li>
          
        
        
          <li class="nav-item" id="color-toggle-btn">
            <a class="nav-link" target="_self" href="javascript:;" aria-label="Color Toggle">
              <i class="iconfont icon-dark" id="color-toggle-icon"></i>
            </a>
          </li>
        
      </ul>
    </div>
  </div>
</nav>

  

<div id="banner" class="banner" parallax=true
     style="background: url('/img/default.png') no-repeat center center; background-size: cover;">
  <div class="full-bg-img">
    <div class="mask flex-center" style="background-color: rgba(0, 0, 0, 0.3)">
      <div class="banner-text text-center fade-in-up">
        <div class="h2">
          
            <span id="subtitle" data-typed-text="云原生监控入门：使用Prometheus、Alertmanager 实现Cpu和内存的监控告警"></span>
          
        </div>

        
          
  <div class="mt-3">
    
    
      <span class="post-meta">
        <i class="iconfont icon-date-fill" aria-hidden="true"></i>
        <time datetime="2024-03-27 10:15" pubdate>
          2024年3月27日 上午
        </time>
      </span>
    
  </div>

  <div class="mt-1">
    
      <span class="post-meta mr-2">
        <i class="iconfont icon-chart"></i>
        
          3.3k 字
        
      </span>
    

    
      <span class="post-meta mr-2">
        <i class="iconfont icon-clock-fill"></i>
        
        
        
          28 分钟
        
      </span>
    

    
    
  </div>


        
      </div>

      
    </div>
  </div>
</div>

</div>

  </header>

  <main>
    
      

<div class="container-fluid nopadding-x">
  <div class="row nomargin-x">
    <div class="side-col d-none d-lg-block col-lg-2">
      

    </div>

    <div class="col-lg-8 nopadding-x-md">
      <div class="container nopadding-x-md" id="board-ctn">
        <div id="board">
          <article class="post-content mx-auto">
            <h1 id="seo-header">云原生监控入门：使用Prometheus、Alertmanager 实现Cpu和内存的监控告警</h1>
            
            
              <div class="markdown-body">
                
                <h1 id="引言"><a href="#引言" class="headerlink" title="引言"></a>引言</h1><p>在当今高速发展的云计算时代，对于系统资源的监控变得尤为重要。像 CPU 和内存这样的计算资源，是支撑应用程序运行的基础。一旦这些资源出现瓶颈，将直接影响到应用的性能和稳定性。因此，构建一个有效的监控告警系统，能够实时监测资源使用情况，并在异常发生时及时发出警报，成为了云原生环境中不可或缺的一环。本文将引导您了解如何使用 Prometheus 和 Alertmanager 来构建这样一个监控系统，以实现对 CPU 和内存使用的实时监控和告警。</p>
<p>首先，我们需要了解 Prometheus 是什么。</p>
<h1 id="Prometheus-是什么？"><a href="#Prometheus-是什么？" class="headerlink" title="Prometheus 是什么？"></a>Prometheus 是什么？</h1><p><img src="https://files.mdnice.com/user/33721/c4332dcb-450f-4504-ab91-bc06137a1db6.jpeg" srcset="/img/loading.gif" lazyload alt="Prometheus"></p>
<p>Prometheus 是一款开源的监控系统，它的核心功能包括多维度数据模型、灵活的查询语言以及不依赖分布式存储等。Prometheus 通过 Pull 模式抓取指标数据，这意味着服务需要暴露一个端口提供指标信息，而 Prometheus 服务器则会周期性地从这个端口抓取数据。</p>
<h2 id="Prometheus-的优势"><a href="#Prometheus-的优势" class="headerlink" title="Prometheus 的优势"></a>Prometheus 的优势</h2><p>Prometheus 是一个开源的完整监控解决方案，其对传统监控系统的测试和告警模型进行了彻底的颠覆，形成了基于中央化的规则计算、统一分析和告警的新模型。 相比于传统监控系统 Prometheus 具有以下优点：</p>
<h2 id="Prometheus-易于管理"><a href="#Prometheus-易于管理" class="headerlink" title="Prometheus 易于管理"></a>Prometheus 易于管理</h2><p>Prometheus 核心部分只有一个单独的二进制文件，不存在任何的第三方依赖(数据库，缓存等等)。唯一需要的就是本地磁盘，因此不会有潜在级联故障的风险。</p>
<p>Prometheus 基于 Pull 模型的架构方式，可以在任何地方（本地电脑，开发环境，测试环境）搭建我们的监控系统。对于一些复杂的情况，还可以使用 Prometheus 服务发现(Service Discovery)的能力动态管理监控目标。</p>
<h2 id="监控服务的内部运行状态"><a href="#监控服务的内部运行状态" class="headerlink" title="监控服务的内部运行状态"></a>监控服务的内部运行状态</h2><p>Pometheus 鼓励用户监控服务的内部状态，基于 Prometheus 丰富的 Client 库，用户可以轻松的在应用程序中添加对 Prometheus 的支持，从而让用户可以获取服务和应用内部真正的运行状态。</p>
<p><img src="https://files.mdnice.com/user/33721/e35511e5-54c0-4fec-875c-884916b2fdf8.png" srcset="/img/loading.gif" lazyload alt="Alerts"></p>
<p><img src="https://files.mdnice.com/user/33721/f81ef482-f851-402b-a241-c0c770b120f8.png" srcset="/img/loading.gif" lazyload alt="Targets"></p>
<h2 id="强大的数据模型"><a href="#强大的数据模型" class="headerlink" title="强大的数据模型"></a>强大的数据模型</h2><p>所有采集的监控数据均以指标(metric)的形式保存在内置的时间序列数据库当中(TSDB)。所有的样本除了基本的指标名称以外，还包含一组用于描述该样本特征的标签。</p>
<figure class="highlight dart"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs dart">http_request_status&#123;code=<span class="hljs-string">&#x27;200&#x27;</span>,content_path=<span class="hljs-string">&#x27;/api/path&#x27;</span>, environment=<span class="hljs-string">&#x27;produment&#x27;</span>&#125; =&gt; [value1<span class="hljs-meta">@timestamp</span>1,value2<span class="hljs-meta">@timestamp</span>2...]<br>http_request_status&#123;code=<span class="hljs-string">&#x27;200&#x27;</span>,content_path=<span class="hljs-string">&#x27;/api/path2&#x27;</span>, environment=<span class="hljs-string">&#x27;produment&#x27;</span>&#125; =&gt; [value1<span class="hljs-meta">@timestamp</span>1,value2<span class="hljs-meta">@timestamp</span>2...]<br></code></pre></td></tr></table></figure>

<p><img src="https://files.mdnice.com/user/33721/945df78a-07df-4e5a-aaa6-2b67b313ad61.png" srcset="/img/loading.gif" lazyload alt="image"></p>
<p>每一条时间序列由指标名称(Metrics Name)以及一组标签(Labels)唯一标识。每条时间序列按照时间的先后顺序存储一系列的样本值。</p>
<p>表示维度的标签可能来源于你的监控对象的状态，比如 code&#x3D;404 或者 content_path&#x3D;&#x2F;api&#x2F;path。也可能来源于的你的环境定义，比如 environment&#x3D;produment。基于这些 Labels 我们可以方便地对监控数据进行聚合，过滤，裁剪。</p>
<h2 id="强大的查询语言-PromQL"><a href="#强大的查询语言-PromQL" class="headerlink" title="强大的查询语言 PromQL"></a>强大的查询语言 PromQL</h2><p>Prometheus 内置了一个强大的数据查询语言 PromQL。 通过 PromQL 可以实现对监控数据的查询、聚合。同时 PromQL 也被应用于数据可视化(如 Grafana)以及告警当中。</p>
<p><img src="https://files.mdnice.com/user/33721/97eefd3a-5918-47aa-b7c5-4f1580a07050.png" srcset="/img/loading.gif" lazyload alt="node_cpu_seconds_total"></p>
<p>通过 PromQL 可以轻松回答类似于以下问题：</p>
<ul>
<li>在过去一段时间中 95%应用延迟时间的分布范围？</li>
<li>预测在 4 小时后，磁盘空间占用大致会是什么情况？</li>
<li>CPU 占用率前 5 位的服务有哪些？(过滤)</li>
</ul>
<h2 id="高效"><a href="#高效" class="headerlink" title="高效"></a>高效</h2><p>对于监控系统而言，大量的监控任务必然导致有大量的数据产生。而 Prometheus 可以高效地处理这些数据，对于单一 Prometheus Server 实例而言它可以处理：</p>
<ul>
<li>数以百万的监控指标</li>
<li>每秒处理数十万的数据点。</li>
</ul>
<h2 id="可扩展"><a href="#可扩展" class="headerlink" title="可扩展"></a>可扩展</h2><p>Prometheus 是如此简单，因此你可以在每个数据中心、每个团队运行独立的 Prometheus Sevrer。Prometheus 对于联邦集群的支持，可以让多个 Prometheus 实例产生一个逻辑集群，当单实例 Prometheus Server 处理的任务量过大时，通过使用功能分区(sharding)+联邦集群(federation)可以对其进行扩展。</p>
<h2 id="易于集成"><a href="#易于集成" class="headerlink" title="易于集成"></a>易于集成</h2><p>使用 Prometheus 可以快速搭建监控服务，并且可以非常方便地在应用程序中进行集成。目前支持： Java， JMX， Python， Go，Ruby， .Net， Node.js 等等语言的客户端 SDK，基于这些 SDK 可以快速让应用程序纳入到 Prometheus 的监控当中，或者开发自己的监控数据收集程序。同时这些客户端收集的监控数据，不仅仅支持 Prometheus，还能支持 Graphite 这些其他的监控工具。</p>
<p>同时 Prometheus 还支持与其他的监控系统进行集成：Graphite， Statsd， Collected， Scollector， muini， Nagios 等。</p>
<p>Prometheus 社区还提供了大量第三方实现的监控数据采集支持：JMX， CloudWatch， EC2， MySQL， PostgresSQL， Haskell， Bash， SNMP， Consul， Haproxy， Mesos， Bind， CouchDB， Django， Memcached， RabbitMQ， Redis， RethinkDB， Rsyslog 等等。</p>
<h2 id="可视化"><a href="#可视化" class="headerlink" title="可视化"></a>可视化</h2><p>Prometheus Server 中自带了一个 Prometheus UI，通过这个 UI 可以方便地直接对数据进行查询，并且支持直接以图形化的形式展示数据。同时 Prometheus 还提供了一个独立的基于 Ruby On Rails 的 Dashboard 解决方案 Promdash。最新的 Grafana 可视化工具也已经提供了完整的 Prometheus 支持，基于 Grafana 可以创建更加精美的监控图标。基于 Prometheus 提供的 API 还可以实现自己的监控可视化 UI。</p>
<p><img src="https://files.mdnice.com/user/33721/4afb2425-e7f9-4be1-90cc-ba6734f7635a.png" srcset="/img/loading.gif" lazyload alt="grafana"></p>
<p><img src="https://files.mdnice.com/user/33721/9bfc40c2-a6e0-44e8-ac42-3baff9e0fdf2.png" srcset="/img/loading.gif" lazyload alt="主机基础监控"></p>
<h2 id="开放性"><a href="#开放性" class="headerlink" title="开放性"></a>开放性</h2><p>通常来说当我们需要监控一个应用程序时，一般需要该应用程序提供对相应监控系统协议的支持。因此应用程序会与所选择的监控系统进行绑定。为了减少这种绑定所带来的限制。对于决策者而言要么你就直接在应用中集成该监控系统的支持，要么就在外部创建单独的服务来适配不同的监控系统。</p>
<p>而对于 Prometheus 来说，使用 Prometheus 的 client library 的输出格式不止支持 Prometheus 的格式化数据，也可以输出支持其它监控系统的格式化数据，比如 Graphite。</p>
<p>因此你甚至可以在不使用 Prometheus 的情况下，采用 Prometheus 的 client library 来让你的应用程序支持监控数据采集。</p>
<h1 id="如何利用-Prometheus-来监控-CPU-和内存"><a href="#如何利用-Prometheus-来监控-CPU-和内存" class="headerlink" title="如何利用 Prometheus 来监控 CPU 和内存"></a>如何利用 Prometheus 来监控 CPU 和内存</h1><p>在 Prometheus 中，可以通过节点导出器（node_exporter）来收集主机级别的监控指标，包括 CPU 和内存的使用情况。节点导出器是一个官方提供的用于暴露硬件和操作系统级别指标的工具。部署并配置好节点导出器后，Prometheus 就能开始抓取相关的监控数据。</p>
<figure class="highlight nestedtext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br><span class="line">89</span><br><span class="line">90</span><br><span class="line">91</span><br><span class="line">92</span><br></pre></td><td class="code"><pre><code class="hljs nestedtext"><span class="hljs-attribute">version</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&#x27;3&#x27;</span><br><br><span class="hljs-attribute">services</span><span class="hljs-punctuation">:</span><br>  <span class="hljs-attribute">components-prometheus</span><span class="hljs-punctuation">:</span><br>    <span class="hljs-attribute">image</span><span class="hljs-punctuation">:</span> <span class="hljs-string">prom/prometheus:v2.30.3</span><br>    <span class="hljs-attribute">container_name</span><span class="hljs-punctuation">:</span> <span class="hljs-string">components-prometheus</span><br>    <span class="hljs-attribute">deploy</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-attribute">resources</span><span class="hljs-punctuation">:</span><br>        <span class="hljs-attribute">limits</span><span class="hljs-punctuation">:</span><br>          <span class="hljs-attribute">cpus</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&#x27;0.8&#x27;</span><br>          <span class="hljs-attribute">memory</span><span class="hljs-punctuation">:</span> <span class="hljs-string">12G</span><br>    <span class="hljs-attribute">ports</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">9090:9090</span><br>    <span class="hljs-comment">#privileged: true</span><br>    <span class="hljs-attribute">volumes</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">./prometheus.yml:/etc/prometheus/prometheus.yml</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">./data:/prometheus</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">/app/ca/server/server.crt:/etc/prometheus/server.crt</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">/app/ca/server/server.key:/etc/prometheus/server.key</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">/app/ca/root/ca.crt:/etc/prometheus/ca.crt</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">./rules:/etc/prometheus/rules</span><br>      <span class="hljs-comment">#- /data/prometheus/web-config.yml:/etc/prometheus/web-config.yml</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">/etc/localtime:/etc/localtime:ro</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">/etc/timezone:/etc/timezone:ro</span><br>    <span class="hljs-attribute">command</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;--config.file=/etc/prometheus/prometheus.yml&#x27;</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;--storage.tsdb.path=/prometheus&#x27;</span><br>    <span class="hljs-attribute">depends_on</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">node-exporter</span><br>    <span class="hljs-attribute">environment</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">TZ=Asia/Shanghai</span><br>    <span class="hljs-attribute">networks</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">monitoring</span><br>  <span class="hljs-attribute">components-node-exporter</span><span class="hljs-punctuation">:</span><br>    <span class="hljs-attribute">image</span><span class="hljs-punctuation">:</span> <span class="hljs-string">prom/node-exporter:v1.2.2</span><br>    <span class="hljs-attribute">container_name</span><span class="hljs-punctuation">:</span> <span class="hljs-string">components-node-exporter</span><br>    <span class="hljs-comment">#privileged: true</span><br>    <span class="hljs-attribute">ports</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">9100:9100</span><br>    <span class="hljs-attribute">volumes</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">/proc:/host/proc:ro</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">/sys:/host/sys:ro</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">/:/rootfs:ro</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">/etc/localtime:/etc/localtime:ro</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">/etc/timezone:/etc/timezone:ro</span><br>    <span class="hljs-attribute">command</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;--path.procfs=/host/proc&#x27;</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;--path.sysfs=/host/sys&#x27;</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;--collector.filesystem.ignored-mount-points&#x27;</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;^/(sys|proc|dev|host|etc)($$|/)&#x27;</span><br>    <span class="hljs-attribute">networks</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">monitoring</span><br>    <span class="hljs-attribute">environment</span><span class="hljs-punctuation">:</span><br>     <span class="hljs-bullet">-</span> <span class="hljs-string">TZ=Asia/Shanghai</span><br>  <span class="hljs-attribute">components-alertmanager</span><span class="hljs-punctuation">:</span><br>    <span class="hljs-comment">#volumes:</span><br>      <span class="hljs-comment">#- ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml</span><br>    <span class="hljs-attribute">command</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;--config.file=/etc/alertmanager/alertmanager.yml&#x27;</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;--cluster.advertise-address=10.254.0.192:9093&#x27;</span><br>    <span class="hljs-attribute">ports</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">9093:9093</span><br>    <span class="hljs-attribute">image</span><span class="hljs-punctuation">:</span> <span class="hljs-string">prom/alertmanager</span><br>    <span class="hljs-attribute">container_name</span><span class="hljs-punctuation">:</span> <span class="hljs-string">components-alertmanager</span><br>    <span class="hljs-attribute">networks</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">monitoring</span><br>    <span class="hljs-attribute">volumes</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">/etc/localtime:/etc/localtime</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string"> /etc/timezone:/etc/timezone</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml</span><br>    <span class="hljs-attribute">environment</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">TZ=Asia/Shanghai</span><br><br>  <span class="hljs-attribute">components-grafana</span><span class="hljs-punctuation">:</span><br>    <span class="hljs-attribute">image</span><span class="hljs-punctuation">:</span> <span class="hljs-string">grafana/grafana</span><br>    <span class="hljs-attribute">hostname</span><span class="hljs-punctuation">:</span> <span class="hljs-string">grafana</span><br>    <span class="hljs-attribute">container_name</span><span class="hljs-punctuation">:</span> <span class="hljs-string">components-grafana</span><br>    <span class="hljs-attribute">ports</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">3000:3000</span><br>    <span class="hljs-attribute">volumes</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">/etc/localtime:/etc/localtime:ro</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string"> /etc/timezone:/etc/timezone:ro</span><br>    <span class="hljs-attribute">environment</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">TZ=Asia/Shanghai</span><br>    <span class="hljs-attribute">networks</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">monitoring</span><br><span class="hljs-attribute">networks</span><span class="hljs-punctuation">:</span><br>  <span class="hljs-attribute">monitoring</span><span class="hljs-punctuation">:</span><br>    <span class="hljs-attribute">name</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;monitoring&quot;</span><br>    <span class="hljs-attribute">ipam</span><span class="hljs-punctuation">:</span><br>        <span class="hljs-attribute">config</span><span class="hljs-punctuation">:</span><br>        <span class="hljs-bullet">-</span> <span class="hljs-string">subnet: 172.240.0.0/16</span><br></code></pre></td></tr></table></figure>

<p><strong>prometheus.yml</strong></p>
<figure class="highlight nestedtext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br></pre></td><td class="code"><pre><code class="hljs nestedtext"><span class="hljs-attribute">global</span><span class="hljs-punctuation">:</span><br>  <span class="hljs-attribute">scrape_interval</span><span class="hljs-punctuation">:</span> <span class="hljs-string"> 15s</span><br>  <span class="hljs-attribute">scrape_timeout</span><span class="hljs-punctuation">:</span> <span class="hljs-string">10s</span><br>  <span class="hljs-attribute">evaluation_interval</span><span class="hljs-punctuation">:</span> <span class="hljs-string">15s</span><br><br><span class="hljs-comment">#rule</span><br><span class="hljs-attribute">rule_files</span><span class="hljs-punctuation">:</span><br>  <span class="hljs-bullet">-</span> <span class="hljs-string">./rules/*.rules</span><br><br><span class="hljs-attribute">scrape_configs</span><span class="hljs-punctuation">:</span><br>  <span class="hljs-bullet">-</span> <span class="hljs-string">job_name: &#x27;prometheus&#x27;</span><br>    <span class="hljs-attribute">static_configs</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">targets: [&#x27;server_ip:9090&#x27;]</span><br>        <span class="hljs-attribute">labels</span><span class="hljs-punctuation">:</span><br>          <span class="hljs-attribute">instance</span><span class="hljs-punctuation">:</span> <span class="hljs-string">prometheus</span><br><br><br>  <span class="hljs-bullet">-</span> <span class="hljs-string">job_name: &#x27;node_export&#x27;</span><br>    <span class="hljs-attribute">static_configs</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">targets: [&#x27;server_ip:9100&#x27;]</span><br>        <span class="hljs-attribute">labels</span><span class="hljs-punctuation">:</span><br>          <span class="hljs-attribute">instance</span><span class="hljs-punctuation">:</span> <span class="hljs-string">node_export</span><br><br><br><span class="hljs-comment">#alarm</span><br><span class="hljs-attribute">alerting</span><span class="hljs-punctuation">:</span><br>  <span class="hljs-attribute">alertmanagers</span><span class="hljs-punctuation">:</span><br>    <span class="hljs-bullet">-</span> <span class="hljs-string">static_configs:</span><br>        <span class="hljs-bullet">-</span> <span class="hljs-string">targets: [&#x27;server_ip:9093&#x27;]</span><br>    <span class="hljs-comment">#- scheme: http</span><br>    <span class="hljs-comment">#- timeout: 10s</span><br></code></pre></td></tr></table></figure>

<p><img src="https://files.mdnice.com/user/33721/c6b56740-024d-41b5-99f1-bc6a91163d01.png" srcset="/img/loading.gif" lazyload alt="Metric"></p>
<p>然而，仅有监控数据是不够的，我们还需要设置告警机制以便在资源使用达到阈值时得到通知。这就是 Alertmanager 发挥作用的地方。Alertmanager 是 Prometheus 生态中的一个组件，主要用于处理由 Prometheus 发出的告警信息。它可以进行告警去重、分组、路由以及静默等操作，帮助我们更智能地管理告警信息。</p>
<p><img src="https://files.mdnice.com/user/33721/ff1c40db-7d63-4a73-a5db-91fe117f556f.png" srcset="/img/loading.gif" lazyload alt="image"></p>
<h1 id="如何利用-Alertmanager-实现告警"><a href="#如何利用-Alertmanager-实现告警" class="headerlink" title="如何利用 Alertmanager 实现告警"></a>如何利用 Alertmanager 实现告警</h1><p>为了实现 CPU 和内存的监控告警，我们需要在 Prometheus 中定义相应的告警规则。这些规则会基于预设的阈值来判断何时触发告警。例如，我们可以设置当 CPU 使用率超过 80%或内存使用超过 70%时发出告警。告警规则的配置非常灵活，可以根据实际需求进行调整。</p>
<p>具体实施步骤如下：</p>
<ol>
<li>部署 Prometheus 和节点导出器。确保 Prometheus 可以抓取到 CPU 和内存的监控数据。</li>
<li>在 Prometheus 中配置告警规则，为 CPU 和内存使用设置合理的阈值。</li>
<li>部署 Alertmanager，并将其与 Prometheus 集成。</li>
<li>配置 Alertmanager 的规则，定义告警的通知方式，如邮件、Slack 或其他即时通讯工具。</li>
<li>测试告警机制，确保在资源使用超过阈值时能够收到告警。<br><strong>alert.rules</strong></li>
</ol>
<figure class="highlight nestedtext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><code class="hljs nestedtext"><span class="hljs-attribute">groups</span><span class="hljs-punctuation">:</span><br>  <span class="hljs-bullet">-</span> <span class="hljs-string">name: server-status</span><br>    <span class="hljs-attribute">rules</span><span class="hljs-punctuation">:</span><br>      <span class="hljs-bullet">-</span> <span class="hljs-string">alert: HighMemoryUsage</span><br>        <span class="hljs-attribute">expr</span><span class="hljs-punctuation">:</span> <span class="hljs-string">(100 - ((node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes) / node_memory_MemTotal_bytes * 100))&gt;80</span><br>        <span class="hljs-attribute">for</span><span class="hljs-punctuation">:</span> <span class="hljs-string">1m</span><br>        <span class="hljs-attribute">labels</span><span class="hljs-punctuation">:</span><br>          <span class="hljs-attribute">severity</span><span class="hljs-punctuation">:</span> <span class="hljs-string">critical</span><br>        <span class="hljs-attribute">annotations</span><span class="hljs-punctuation">:</span><br>          <span class="hljs-attribute">summary</span><span class="hljs-punctuation">:</span> <span class="hljs-string">服务器内存告警</span><br>          <span class="hljs-attribute">description</span><span class="hljs-punctuation">:</span> <span class="hljs-string">服务器内存利用率超过80%</span><br></code></pre></td></tr></table></figure>

<p><strong>alertmanager.yml</strong></p>
<figure class="highlight yaml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br></pre></td><td class="code"><pre><code class="hljs yaml"><span class="hljs-attr">global:</span><br>  <span class="hljs-attr">resolve_timeout:</span> <span class="hljs-string">5m</span><br>  <span class="hljs-attr">smtp_smarthost:</span> <span class="hljs-string">&#x27;smtp.qq.com:465&#x27;</span><br>  <span class="hljs-attr">smtp_from:</span> <span class="hljs-string">&#x27;abc@qq.com&#x27;</span> <span class="hljs-comment"># 发送告警的邮箱</span><br>  <span class="hljs-attr">smtp_auth_username:</span> <span class="hljs-string">&#x27;abc@qq.com&#x27;</span>  <span class="hljs-comment">#发送告警的邮箱</span><br>  <span class="hljs-attr">smtp_auth_password:</span> <span class="hljs-string">&#x27;aaaaa&#x27;</span> <span class="hljs-comment">#邮箱授权密码</span><br>  <span class="hljs-attr">smtp_require_tls:</span> <span class="hljs-literal">false</span><br><br><span class="hljs-attr">route:</span><br>  <span class="hljs-attr">group_by:</span> [<span class="hljs-string">&#x27;alertname&#x27;</span>]<br>  <span class="hljs-attr">group_wait:</span> <span class="hljs-string">10s</span> <span class="hljs-comment"># 告警等待时间。告警产生后等待10s，如果有同组告警一起发出</span><br>  <span class="hljs-attr">group_interval:</span> <span class="hljs-string">5m</span>  <span class="hljs-comment"># 两组告警的间隔时间</span><br>  <span class="hljs-attr">repeat_interval:</span> <span class="hljs-string">5m</span> <span class="hljs-comment">#重复告警的间隔时间，减少相同右键的发送频率 此处为测试设置为5m</span><br>  <span class="hljs-attr">receiver:</span> <span class="hljs-string">&#x27;email-notifications&#x27;</span>   <span class="hljs-comment"># 默认接收者  routes: # 指定那些组可以接收消息</span><br><span class="hljs-attr">templates:</span><br><span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;/tmp/prom/alertmanager/template/email.tmpl&#x27;</span><br><span class="hljs-attr">receivers:</span><br><span class="hljs-bullet">-</span> <span class="hljs-attr">name:</span> <span class="hljs-string">&#x27;email-notifications&#x27;</span><br>  <span class="hljs-attr">email_configs:</span><br>  <span class="hljs-bullet">-</span> <span class="hljs-attr">to:</span> <span class="hljs-string">&#x27;abc@163.com&#x27;</span><br>    <span class="hljs-attr">send_resolved:</span> <span class="hljs-literal">true</span><br>    <span class="hljs-attr">headers:</span> &#123; <span class="hljs-attr">Subject:</span> <span class="hljs-string">&quot; 【监控告警】 <span class="hljs-template-variable">&#123;&#123; .CommonLabels.alertname &#125;&#125;</span> &quot;</span> &#125; <span class="hljs-comment">#标题</span><br>    <span class="hljs-attr">html:</span> <span class="hljs-string">&#x27;<span class="hljs-template-variable">&#123;&#123; template &quot;email.to.html&quot; .&#125;&#125;</span>&#x27;</span><br>    <span class="hljs-comment">#from: &#x27;用户邮箱@sina.com&#x27;</span><br>    <span class="hljs-comment">#smarthost: &#x27;smtp.sina.com:25&#x27;</span><br>    <span class="hljs-comment">#auth_username: &#x27;用户邮箱@sina.com&#x27;</span><br>    <span class="hljs-comment">#auth_password: &#x27;密码&#x27;</span><br>    <span class="hljs-comment">#require_tls: false</span><br></code></pre></td></tr></table></figure>

<p><strong>email.tmpl</strong></p>
<figure class="highlight handlebars"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br></pre></td><td class="code"><pre><code class="hljs handlebars"><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">define</span> <span class="hljs-string">&quot;email.to.html&quot;</span> &#125;&#125;</span><span class="language-xml"></span><br><span class="language-xml"></span><span class="hljs-template-variable">&#123;&#123;<span class="hljs-name">-</span> if gt (<span class="hljs-name">len</span> .Alerts.Firing) <span class="hljs-number">0</span> -&#125;&#125;</span><span class="language-xml"></span><br><span class="language-xml"><span class="hljs-tag">&lt;<span class="hljs-name">h2</span>&gt;</span>告警<span class="hljs-tag">&lt;/<span class="hljs-name">h2</span>&gt;</span></span><br><span class="language-xml"><span class="hljs-tag">&lt;<span class="hljs-name">table</span> <span class="hljs-attr">border</span>=<span class="hljs-string">&quot;5&quot;</span>&gt;</span></span><br><span class="language-xml">    <span class="hljs-tag">&lt;<span class="hljs-name">tr</span>&gt;</span><span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span>报警项<span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">        <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span>实例<span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">        <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span>报警详情<span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">        <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span>报警级别<span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">        <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span>开始时间<span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">    <span class="hljs-tag">&lt;/<span class="hljs-name">tr</span>&gt;</span></span><br><span class="language-xml">    </span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">range</span> $i, $alert <span class="hljs-attr">:</span>= .Alerts &#125;&#125;</span><span class="language-xml"></span><br><span class="language-xml">        <span class="hljs-tag">&lt;<span class="hljs-name">tr</span>&gt;</span><span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">index</span> $alert.Labels <span class="hljs-string">&quot;alertname&quot;</span> &#125;&#125;</span><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">            <span class="hljs-tag">&lt;<span class="hljs-name">td</span> <span class="hljs-attr">style</span>=<span class="hljs-string">&quot;color:#32CD32&quot;</span> &gt;</span></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">index</span> $alert.Labels <span class="hljs-string">&quot;instance&quot;</span> &#125;&#125;</span><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">            <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">index</span> $alert.Annotations <span class="hljs-string">&quot;description&quot;</span> &#125;&#125;</span><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">            <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">$alert.Labels.severity</span> &#125;&#125;</span><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">            <span class="hljs-tag">&lt;<span class="hljs-name">td</span> <span class="hljs-attr">style</span>=<span class="hljs-string">&quot;color:#FF7F50&quot;</span>&gt;</span></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">$alert.StartsAt.Local.Format</span> <span class="hljs-string">&quot;2006-01-02 15:04:05&quot;</span> &#125;&#125;</span><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">        <span class="hljs-tag">&lt;/<span class="hljs-name">tr</span>&gt;</span></span><br><span class="language-xml">    </span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">end</span> &#125;&#125;</span><span class="language-xml"></span><br><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">table</span>&gt;</span></span><br><span class="language-xml"></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">end</span> &#125;&#125;</span><span class="language-xml"></span><br><span class="language-xml"></span><span class="hljs-template-variable">&#123;&#123;<span class="hljs-name">-</span> if gt (<span class="hljs-name">len</span> .Alerts.Resolved) <span class="hljs-number">0</span> -&#125;&#125;</span><span class="language-xml"></span><br><span class="language-xml"><span class="hljs-tag">&lt;<span class="hljs-name">h2</span>&gt;</span>已经恢复<span class="hljs-tag">&lt;/<span class="hljs-name">h2</span>&gt;</span></span><br><span class="language-xml"><span class="hljs-tag">&lt;<span class="hljs-name">table</span> <span class="hljs-attr">border</span>=<span class="hljs-string">&quot;5&quot;</span>&gt;</span></span><br><span class="language-xml">    <span class="hljs-tag">&lt;<span class="hljs-name">tr</span>&gt;</span><span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span>报警项<span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">        <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span>实例<span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">        <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span>报警详情<span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">        <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span>报警级别<span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">        <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span>开始时间<span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">                <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span>恢复时间<span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">    <span class="hljs-tag">&lt;/<span class="hljs-name">tr</span>&gt;</span></span><br><span class="language-xml">    </span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">range</span> $i, $alert <span class="hljs-attr">:</span>= .Alerts &#125;&#125;</span><span class="language-xml"></span><br><span class="language-xml">        <span class="hljs-tag">&lt;<span class="hljs-name">tr</span>&gt;</span><span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">index</span> $alert.Labels <span class="hljs-string">&quot;alertname&quot;</span> &#125;&#125;</span><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">            <span class="hljs-tag">&lt;<span class="hljs-name">td</span> <span class="hljs-attr">style</span>=<span class="hljs-string">&quot;color:#32CD32&quot;</span>&gt;</span></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">index</span> $alert.Labels <span class="hljs-string">&quot;instance&quot;</span> &#125;&#125;</span><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">            <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">index</span> $alert.Annotations <span class="hljs-string">&quot;description&quot;</span> &#125;&#125;</span><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">            <span class="hljs-tag">&lt;<span class="hljs-name">td</span>&gt;</span></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">$alert.Labels.severity</span> &#125;&#125;</span><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">            <span class="hljs-tag">&lt;<span class="hljs-name">td</span> <span class="hljs-attr">style</span>=<span class="hljs-string">&quot;color:#FF7F50&quot;</span>&gt;</span></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">$alert.StartsAt.Local.Format</span> <span class="hljs-string">&quot;2006-01-02 15:04:05&quot;</span> &#125;&#125;</span><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">            <span class="hljs-tag">&lt;<span class="hljs-name">td</span> <span class="hljs-attr">style</span>=<span class="hljs-string">&quot;color:#FF7F50&quot;</span>&gt;</span></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">$alert.EndsAt.Local.Format</span> <span class="hljs-string">&quot;2006-01-02 15:04:05&quot;</span> &#125;&#125;</span><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">td</span>&gt;</span></span><br><span class="language-xml">        <span class="hljs-tag">&lt;/<span class="hljs-name">tr</span>&gt;</span></span><br><span class="language-xml">    </span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">end</span> &#125;&#125;</span><span class="language-xml"></span><br><span class="language-xml"><span class="hljs-tag">&lt;/<span class="hljs-name">table</span>&gt;</span></span><br><span class="language-xml"></span><span class="hljs-template-variable">&#123;&#123; <span class="hljs-name">end</span> &#125;&#125;</span><span class="hljs-template-variable">&#123;&#123;<span class="hljs-name">-</span> end &#125;&#125;</span><br></code></pre></td></tr></table></figure>
<p><img src="https://files.mdnice.com/user/33721/e7be2120-aad4-4d41-9619-7eb0f4729575.jpg" srcset="/img/loading.gif" lazyload alt="QkdXBVQAQFBSXlhaBkhbQ1JeVAYGBwMNDwYCAQA3MDM0"></p>
<p>通过以上步骤，我们就构建了一个简单的云原生监控告警系统，能够针对 CPU 和内存资源的使用情况进行实时监控，并在达到预设阈值时发出告警。这不仅有助于及时发现潜在的性能问题，还能够在一定程度上预防系统的不稳定和服务中断。</p>
<p>总结来说，云原生监控入门并不复杂，只要掌握了正确的工具和配置方法，就能够有效地实现资源的监控和告警。Prometheus 和 Alertmanager 作为云原生生态系统中的优秀工具，它们的结合使用，为我们提供了一种强大且灵活的监控解决方案。随着技术的不断进步，未来的监控系统将会更加智能化，但就目前而言，掌握 Prometheus 和 Alertmanager 的使用，已经足够我们构建一个稳定可靠的云原生监控告警系统。</p>

                
              </div>
            
            <hr/>
            <div>
              <div class="post-metas my-3">
  
    <div class="post-meta mr-3 d-flex align-items-center">
      <i class="iconfont icon-category"></i>
      

<span class="category-chains">
  
  
    
      <span class="category-chain">
        
  <a href="/categories/%E7%9B%91%E6%8E%A7/" class="category-chain-item">监控</a>
  
  

      </span>
    
  
</span>

    </div>
  
  
    <div class="post-meta">
      <i class="iconfont icon-tags"></i>
      
        <a href="/tags/%E7%9B%91%E6%8E%A7/" class="print-no-link">#监控</a>
      
    </div>
  
</div>


              
  

  <div class="license-box my-3">
    <div class="license-title">
      <div>云原生监控入门：使用Prometheus、Alertmanager 实现Cpu和内存的监控告警</div>
      <div>http://blog.xhweng.cn/2024/03/27/云原生监控入门：使用Prometheus、Alertmanager 实现Cpu和内存的监控告警/</div>
    </div>
    <div class="license-meta">
      
        <div class="license-meta-item">
          <div>作者</div>
          <div>xuehaoweng</div>
        </div>
      
      
        <div class="license-meta-item license-meta-date">
          <div>发布于</div>
          <div>2024年3月27日</div>
        </div>
      
      
      
        <div class="license-meta-item">
          <div>许可协议</div>
          <div>
            
              
              
                <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by/4.0/">
                  <span class="hint--top hint--rounded" aria-label="BY - 署名">
                    <i class="iconfont icon-by"></i>
                  </span>
                </a>
              
            
          </div>
        </div>
      
    </div>
    <div class="license-icon iconfont"></div>
  </div>



              
                <div class="post-prevnext my-3">
                  <article class="post-prev col-6">
                    
                    
                      <a href="/2024/03/27/Prometheus%E9%83%A8%E7%BD%B2TCP%E5%92%8CICMP%E6%9C%8D%E5%8A%A1%E6%8E%A2%E6%B5%8B%EF%BC%9A%E7%9B%91%E6%8E%A7%E7%9A%84%E8%89%BA%E6%9C%AF%E4%B8%8E%E7%A7%91%E5%AD%A6/" title="Prometheus部署TCP和ICMP服务探测：监控的艺术与科学">
                        <i class="iconfont icon-arrowleft"></i>
                        <span class="hidden-mobile">Prometheus部署TCP和ICMP服务探测：监控的艺术与科学</span>
                        <span class="visible-mobile">上一篇</span>
                      </a>
                    
                  </article>
                  <article class="post-next col-6">
                    
                    
                      <a href="/2024/01/05/python_API-KEY/" title="Python项目单独设置API-KEY">
                        <span class="hidden-mobile">Python项目单独设置API-KEY</span>
                        <span class="visible-mobile">下一篇</span>
                        <i class="iconfont icon-arrowright"></i>
                      </a>
                    
                  </article>
                </div>
              
            </div>

            
          </article>
        </div>
      </div>
    </div>

    <div class="side-col d-none d-lg-block col-lg-2">
      
  <aside class="sidebar" style="margin-left: -1rem">
    <div id="toc">
  <p class="toc-header">
    <i class="iconfont icon-list"></i>
    <span>目录</span>
  </p>
  <div class="toc-body" id="toc-body"></div>
</div>



  </aside>


    </div>
  </div>
</div>





  



  



  



  



  







    

    
      <a id="scroll-top-button" aria-label="TOP" href="#" role="button">
        <i class="iconfont icon-arrowup" aria-hidden="true"></i>
      </a>
    

    
      <div class="modal fade" id="modalSearch" tabindex="-1" role="dialog" aria-labelledby="ModalLabel"
     aria-hidden="true">
  <div class="modal-dialog modal-dialog-scrollable modal-lg" role="document">
    <div class="modal-content">
      <div class="modal-header text-center">
        <h4 class="modal-title w-100 font-weight-bold">搜索</h4>
        <button type="button" id="local-search-close" class="close" data-dismiss="modal" aria-label="Close">
          <span aria-hidden="true">&times;</span>
        </button>
      </div>
      <div class="modal-body mx-3">
        <div class="md-form mb-5">
          <input type="text" id="local-search-input" class="form-control validate">
          <label data-error="x" data-success="v" for="local-search-input">关键词</label>
        </div>
        <div class="list-group" id="local-search-result"></div>
      </div>
    </div>
  </div>
</div>

    

    
  </main>

  <footer>
    <div class="footer-inner">
  
    <div class="footer-content">
       <a href="https://hexo.io" target="_blank" rel="nofollow noopener"><span>Hexo</span></a> <i class="iconfont icon-love"></i> <a href="https://github.com/fluid-dev/hexo-theme-fluid" target="_blank" rel="nofollow noopener"><span>Fluid</span></a> 
    </div>
  
  
    <div class="statistics">
  
  

  
    
      <span id="busuanzi_container_site_pv" style="display: none">
        总访问量 
        <span id="busuanzi_value_site_pv"></span>
         次
      </span>
    
    
      <span id="busuanzi_container_site_uv" style="display: none">
        总访客数 
        <span id="busuanzi_value_site_uv"></span>
         人
      </span>
    
    
  
</div>

  
  
    <!-- 备案信息 ICP for China -->
    <div class="beian">
  <span>
    <a href="http://beian.miit.gov.cn/" target="_blank" rel="nofollow noopener">
      皖ICP备2024038809号-2
    </a>
  </span>
  
    
      <span>
        <a
          href="http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=12345678"
          rel="nofollow noopener"
          class="beian-police"
          target="_blank"
        >
          
            <span style="visibility: hidden; width: 0">|</span>
            <img src="/img/police_beian.png" srcset="/img/loading.gif" lazyload alt="police-icon"/>
          
          <span>京公网安备12345678号</span>
        </a>
      </span>
    
  
</div>

  
  
</div>

  </footer>

  <!-- Scripts -->
  
  <script  src="https://lib.baomitu.com/nprogress/0.2.0/nprogress.min.js" ></script>
  <link  rel="stylesheet" href="https://lib.baomitu.com/nprogress/0.2.0/nprogress.min.css" />

  <script>
    NProgress.configure({"showSpinner":false,"trickleSpeed":100})
    NProgress.start()
    window.addEventListener('load', function() {
      NProgress.done();
    })
  </script>


<script  src="https://lib.baomitu.com/jquery/3.6.4/jquery.min.js" ></script>
<script  src="https://lib.baomitu.com/twitter-bootstrap/4.6.1/js/bootstrap.min.js" ></script>
<script  src="/js/events.js" ></script>
<script  src="/js/plugins.js" ></script>


  <script  src="https://lib.baomitu.com/typed.js/2.0.12/typed.min.js" ></script>
  <script>
    (function (window, document) {
      var typing = Fluid.plugins.typing;
      var subtitle = document.getElementById('subtitle');
      if (!subtitle || !typing) {
        return;
      }
      var text = subtitle.getAttribute('data-typed-text');
      
        typing(text);
      
    })(window, document);
  </script>




  
    <script  src="/js/img-lazyload.js" ></script>
  




  
<script>
  Fluid.utils.createScript('https://lib.baomitu.com/tocbot/4.20.1/tocbot.min.js', function() {
    var toc = jQuery('#toc');
    if (toc.length === 0 || !window.tocbot) { return; }
    var boardCtn = jQuery('#board-ctn');
    var boardTop = boardCtn.offset().top;

    window.tocbot.init(Object.assign({
      tocSelector     : '#toc-body',
      contentSelector : '.markdown-body',
      linkClass       : 'tocbot-link',
      activeLinkClass : 'tocbot-active-link',
      listClass       : 'tocbot-list',
      isCollapsedClass: 'tocbot-is-collapsed',
      collapsibleClass: 'tocbot-is-collapsible',
      scrollSmooth    : true,
      includeTitleTags: true,
      headingsOffset  : -boardTop,
    }, CONFIG.toc));
    if (toc.find('.toc-list-item').length > 0) {
      toc.css('visibility', 'visible');
    }

    Fluid.events.registerRefreshCallback(function() {
      if ('tocbot' in window) {
        tocbot.refresh();
        var toc = jQuery('#toc');
        if (toc.length === 0 || !tocbot) {
          return;
        }
        if (toc.find('.toc-list-item').length > 0) {
          toc.css('visibility', 'visible');
        }
      }
    });
  });
</script>


  <script src=https://lib.baomitu.com/clipboard.js/2.0.11/clipboard.min.js></script>

  <script>Fluid.plugins.codeWidget();</script>


  
<script>
  Fluid.utils.createScript('https://lib.baomitu.com/anchor-js/4.3.1/anchor.min.js', function() {
    window.anchors.options = {
      placement: CONFIG.anchorjs.placement,
      visible  : CONFIG.anchorjs.visible
    };
    if (CONFIG.anchorjs.icon) {
      window.anchors.options.icon = CONFIG.anchorjs.icon;
    }
    var el = (CONFIG.anchorjs.element || 'h1,h2,h3,h4,h5,h6').split(',');
    var res = [];
    for (var item of el) {
      res.push('.markdown-body > ' + item.trim());
    }
    if (CONFIG.anchorjs.placement === 'left') {
      window.anchors.options.class = 'anchorjs-link-left';
    }
    window.anchors.add(res.join(', '));

    Fluid.events.registerRefreshCallback(function() {
      if ('anchors' in window) {
        anchors.removeAll();
        var el = (CONFIG.anchorjs.element || 'h1,h2,h3,h4,h5,h6').split(',');
        var res = [];
        for (var item of el) {
          res.push('.markdown-body > ' + item.trim());
        }
        if (CONFIG.anchorjs.placement === 'left') {
          anchors.options.class = 'anchorjs-link-left';
        }
        anchors.add(res.join(', '));
      }
    });
  });
</script>


  
<script>
  Fluid.utils.createScript('https://lib.baomitu.com/fancybox/3.5.7/jquery.fancybox.min.js', function() {
    Fluid.plugins.fancyBox();
  });
</script>


  <script>Fluid.plugins.imageCaption();</script>

  <script  src="/js/local-search.js" ></script>

  <script defer src="https://busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js" ></script>





<!-- 主题的启动项，将它保持在最底部 -->
<!-- the boot of the theme, keep it at the bottom -->
<script  src="/js/boot.js" ></script>


  

  <noscript>
    <div class="noscript-warning">博客在允许 JavaScript 运行的环境下浏览效果更佳</div>
  </noscript>
</body>
</html>
